today <- Sys.Date() # today's date
Mise à jour du 2021-03-09. Source des données : https://www.data.gouv.fr/fr/datasets/donnees-de-laboratoires-pour-le-depistage-indicateurs-sur-les-variants/
Légende
library("RColorBrewer")
brw <- brewer.pal(12, "Set3")
brw[2] <- brw[12] # Darker yellow
brw[10] <- brw[11] # Lighter shade
colsAge <- c("#000000FF", brw[1:10])
names(colsAge) <- c("0", "9", "19", "29", "39", "49", "59", "69", "79", "89", "90")
pchAge <- c(16, 0:9)
names(pchAge) <- names(colsAge)
cexAge <- c(1.2, rep(1, 10))
names(cexAge) <- names(colsAge)
ages <- c("tous", "0-9", "10-19", "20-29", "30-39", "40-49", "50-59", "60-69", "70-79", "80-89", "90+")
par(mfrow = c(1, 1))
plot(0:1, 0:1, type = "n", axes = FALSE, xlab = "", ylab = "")
legend(x = 0.5, y = 1, legend = ages, pch = pchAge, col = colsAge)
# Données France
URL <- "https://www.data.gouv.fr/fr/datasets/r/c43d7f3f-c9f5-436b-9b26-728f80e0fd52"
dataFile <- paste0("data/France_", today, ".csv") # name file with today's date
download.file(URL, dataFile) # download file from repo
dat.France <- read.csv(dataFile, sep = ";", stringsAsFactors = FALSE)
# Format date
dat.France$date1 <- as.Date(substring(dat.France$semaine, 1, 10))
dat.France$date2 <- as.Date(substring(dat.France$semaine, 12, 21))
# Compute data on total tests
dat.France$Nb_tests <- dat.France$Nb_tests_PCR_TA_crible / (dat.France$Prc_tests_PCR_TA_crible / 100)
# Compare to another source
URL <- "https://www.data.gouv.fr/fr/datasets/r/dd0de5d9-b5a5-4503-930a-7b08dc0adc7c"
dataFile <- paste0("data/tests-France_", today, ".csv") # name file with today's date
download.file(URL, dataFile) # download file from repo
dat.tests <- read.csv(dataFile, sep = ";", stringsAsFactors = FALSE)
URL <- "https://www.data.gouv.fr/fr/datasets/r/c1167c4e-8c89-40f2-adb3-1954f8fedfa7"
dataFile <- paste0("data/tests7j-France_", today, ".csv") # name file with today's date
download.file(URL, dataFile) # download file from repo
dat.tests7j <- read.csv(dataFile, sep = ";", stringsAsFactors = FALSE)
dat.tests7j$date1 <- as.Date(substring(dat.tests7j$semaine_glissante, 1, 10))
dat.tests7j$date2 <- as.Date(substring(dat.tests7j$semaine_glissante, 12, 21))
# P nombre de tests positifs
# T nombre de tests total
dat.tests$date <- as.Date(dat.tests$jour)
dateRange <- range(c(range(dat.France$date1), range(dat.France$date2)))
subdat.tests <- dat.tests[dat.tests$date >= dateRange[1] & dat.tests$date <= dateRange[2],]
# Function to compute a sliding window
sliding.window <- function(v, winwdt = 7, pos = 4, na.rm = TRUE){
# v vector to be averaged/summed
# winwdt width of the window
# pos position of the focal day in the window
# FUN function to apply
n <- length(v)
# Initialize output vector
out <- 0 * v + (-1)
out[1:(pos-1)] <- NA
out[(n + 1 - winwdt + pos) : n] <- NA
for(i in pos : (n - winwdt + pos)){
out[i] <- mean(v[(i - pos + 1):(i + winwdt - pos)], na.rm = na.rm)
}
return(out[1:n])
}
subdat.tests.0 <- subdat.tests[subdat.tests$cl_age90 == 0, ]
dat.France.0 <- dat.France[dat.France$cl_age90 == 0, ]
subdat.tests.0$P.7.1 <- 7 * sliding.window(subdat.tests.0$P, pos = 1)
subdat.tests.0$P.7.4 <- 7 * sliding.window(subdat.tests.0$P, pos = 4)
subdat.tests.0$P.7.7 <- 7 * sliding.window(subdat.tests.0$P, pos = 7)
subdat.tests.0$P.8.8 <- 8 * sliding.window(subdat.tests.0$P, pos = 8, winwdt = 8)
plot(dat.France.0$date2, dat.France.0$Nb_tests, ylim = c(1*10^5, 2*10^5), pch = 16,
xlab = "date", ylab = "nombre de tests")
points(subdat.tests.0$date, subdat.tests.0$P.7.1, ylim = c(0, 5*10^5), col = "red")
points(subdat.tests.0$date, subdat.tests.0$P.7.4, ylim = c(0, 5*10^5), col = "green")
points(subdat.tests.0$date, subdat.tests.0$P.7.7, ylim = c(0, 5*10^5), col = "blue")
points(subdat.tests.0$date, subdat.tests.0$P.8.8, ylim = c(0, 5*10^5), col = "purple")
points(dat.tests7j$date2, dat.tests7j$P, pch = 2)
legend(x = as.Date("2021-02-18"), y = 200000, col = c("black", "red", "green", "blue", "purple", "black"), legend = c("sidep tests", "w7, c1", "w7, c4", "w7, c7", "w8, c8", "sidep 7j-fin"), pch = c(16, rep(1, 4), 2))
Legend notation:
w: width of the window, c: position of the index day.
So the sliding window is on the 7 last days. The difference may be due to the variant data being in terms of tests, and the other in terms of people, with duplicates removed.
#plot(dat.France.0$date2, dat.France.0$Nb_tests, ylim = c(1*10^5, 2*10^5), pch = 16,
# xlab = "date", ylab = "comparaison nombre de tests")
#points(subdat.tests.0$date, subdat.tests.0$P.7.7, ylim = c(0, 5*10^5), col = "blue")
#points(subdat.tests.0$date, 1.17*subdat.tests.0$P.7.7, ylim = c(0, 5*10^5), col = "orange")
dat.France.ages <- dat.France[dat.France$cl_age90 != 0,]
# All ages, time
par(las = 1)
plot(dat.France$date2, dat.France$Prc_susp_501Y_V1, ylim = c(0, 100),
col = colsAge[as.character(dat.France$cl_age90)],
pch = pchAge[as.character(dat.France$cl_age90)],
cex = cexAge[as.character(dat.France$cl_age90)],
xlab = "date", ylab = "Proportion V1", axes = FALSE)
axis(1, pos = 0, at = as.Date(unique(dat.France$date2)), labels = format(unique(dat.France$date2), format = "%b %d"))
axis(2)
ageClasses <- sort(unique(dat.France$cl_age90))
nAge <- length(ageClasses)
for(iage in unique(dat.France$cl_age90)){
sub <- dat.France[dat.France$cl_age90 == iage, ]
V1 <- sub$Prc_susp_501Y_V1/100
t <- sub$date2
s <- diff(V1) / (V1[-length(V1)]*(1-V1[-length(V1)]))
plot(t[-length(V1)], s, ylim = c(-0.1, 0.1), main = iage)
print(c(iage, mean(s)))
}
Model over time
# Create new colums with information on number of specific PCR tests
# PCR with V1 result
dat.France.ages$V1 <- dat.France.ages$Nb_susp_501Y_V1
# All other PCRs (considering NAs are non-V1)
dat.France.ages$notV1 <- dat.France.ages$Nb_tests_PCR_TA_crible - dat.France.ages$Nb_susp_501Y_V1
# All other PCRs with a result (removing NAs)
dat.France.ages$notV1.narm <- dat.France.ages$Nb_susp_501Y_V2_3 + dat.France.ages$Nb_susp_ABS
# Check that columns currently sum
all(dat.France.ages$Nb_susp_501Y_V2_3 + dat.France.ages$Nb_susp_ABS + dat.France.ages$Nb_susp_501Y_V1 + dat.France.ages$Nb_susp_IND - dat.France.ages$Nb_tests_PCR_TA_crible == 0)
## [1] TRUE
# GLM
# Assuming that all IND (indetermine) are non-V1
mdl <- glm(cbind(V1, notV1) ~ date2 + cl_age90, data = dat.France.ages, family = "binomial")
summary(mdl)
##
## Call:
## glm(formula = cbind(V1, notV1) ~ date2 + cl_age90, family = "binomial",
## data = dat.France.ages)
##
## Deviance Residuals:
## Min 1Q Median 3Q Max
## -14.015 -5.660 -1.434 3.372 12.882
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) -9.861e+02 6.218e+00 -158.58 <2e-16 ***
## date2 5.281e-02 3.328e-04 158.67 <2e-16 ***
## cl_age90 -7.052e-03 7.232e-05 -97.52 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for binomial family taken to be 1)
##
## Null deviance: 41863.9 on 169 degrees of freedom
## Residual deviance: 6080.8 on 167 degrees of freedom
## AIC: 7687.6
##
## Number of Fisher Scoring iterations: 3
# Ignoring IND
mdl.narm <- glm(cbind(V1, notV1.narm) ~ date2 + cl_age90, data = dat.France.ages, family = "binomial")
summary(mdl.narm)
##
## Call:
## glm(formula = cbind(V1, notV1.narm) ~ date2 + cl_age90, family = "binomial",
## data = dat.France.ages)
##
## Deviance Residuals:
## Min 1Q Median 3Q Max
## -13.1530 -5.1441 -0.9887 3.0369 12.6468
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) -1.102e+03 6.635e+00 -166.09 <2e-16 ***
## date2 5.903e-02 3.551e-04 166.21 <2e-16 ***
## cl_age90 -7.666e-03 7.721e-05 -99.28 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for binomial family taken to be 1)
##
## Null deviance: 44062.4 on 169 degrees of freedom
## Residual deviance: 5369.5 on 167 degrees of freedom
## AIC: 6955.3
##
## Number of Fisher Scoring iterations: 3
# All ages, time
par(las = 1)
plot(dat.France$date2, dat.France$Prc_susp_501Y_V2_3, ylim = c(0, 100),
col = colsAge[as.character(dat.France$cl_age90)],
pch = pchAge[as.character(dat.France$cl_age90)],
cex = cexAge[as.character(dat.France$cl_age90)],
xlab = "date", ylab = "Proportion V2/V3", axes = FALSE)
axis(1, pos = 0, at = as.Date(unique(dat.France$date2)), labels = format(unique(dat.France$date2), format = "%b %d"))
axis(2)
# Create new colums with information on number of specific PCR tests
# PCR with V2/V3 result
dat.France.ages$V23 <- dat.France.ages$Nb_susp_501Y_V2_3
# All other PCRs (considering NAs are non-V23)
dat.France.ages$notV23 <- dat.France.ages$Nb_tests_PCR_TA_crible - dat.France.ages$Nb_susp_501Y_V2_3
# All other PCRs with a result (removing NAs)
dat.France.ages$notV23.narm <- dat.France.ages$Nb_susp_501Y_V1 + dat.France.ages$Nb_susp_ABS
# GLM
# Assuming that all IND (indetermine) are non-V1
mdl <- glm(cbind(V23, notV23) ~ date2 + cl_age90, data = dat.France.ages, family = "binomial")
summary(mdl)
##
## Call:
## glm(formula = cbind(V23, notV23) ~ date2 + cl_age90, family = "binomial",
## data = dat.France.ages)
##
## Deviance Residuals:
## Min 1Q Median 3Q Max
## -5.1093 -2.4440 -0.9282 1.5813 4.9755
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) -2.019e+02 1.397e+01 -14.46 <2e-16 ***
## date2 1.066e-02 7.475e-04 14.26 <2e-16 ***
## cl_age90 -3.127e-03 1.644e-04 -19.02 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for binomial family taken to be 1)
##
## Null deviance: 1646.3 on 169 degrees of freedom
## Residual deviance: 1064.3 on 167 degrees of freedom
## AIC: 2383.7
##
## Number of Fisher Scoring iterations: 4
# Ignoring IND
mdl.narm <- glm(cbind(V23, notV23.narm) ~ date2 + cl_age90, data = dat.France.ages, family = "binomial")
summary(mdl.narm)
##
## Call:
## glm(formula = cbind(V23, notV23.narm) ~ date2 + cl_age90, family = "binomial",
## data = dat.France.ages)
##
## Deviance Residuals:
## Min 1Q Median 3Q Max
## -4.7362 -2.2958 -0.8464 1.5011 4.9508
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) -1.964e+02 1.400e+01 -14.03 <2e-16 ***
## date2 1.037e-02 7.495e-04 13.83 <2e-16 ***
## cl_age90 -3.006e-03 1.654e-04 -18.18 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for binomial family taken to be 1)
##
## Null deviance: 1503.92 on 169 degrees of freedom
## Residual deviance: 967.19 on 167 degrees of freedom
## AIC: 2285.7
##
## Number of Fisher Scoring iterations: 4
URL <- "https://www.data.gouv.fr/fr/datasets/r/73e8851a-d851-43f8-89e4-6178b35b7127"
dataFile <- paste0("data/Regions_", today, ".csv") # name file with today's date
download.file(URL, dataFile) # download file from repo
dat.regions <- read.csv(dataFile, sep = ";", stringsAsFactors = FALSE)
# Format date
dat.regions$date1 <- as.Date(substring(dat.regions$semaine, 1, 10))
dat.regions$date2 <- as.Date(substring(dat.regions$semaine, 12, 21))
# Codes regions
URL <- "https://www.data.gouv.fr/en/datasets/r/34fc7b52-ef11-4ab0-bc16-e1aae5c942e7"
dataFile <- "data/coderegions.csv"
download.file(URL, dataFile)
codesRegions <- read.csv(dataFile, sep = ",", stringsAsFactors = FALSE)
# Turn into dictionary
regs <- codesRegions$nom_region
names(regs) <- as.character(codesRegions$code_region)
# Add region name
dat.regions$reg_name <- regs[as.character(dat.regions$reg)]
# dat.regions[floor(runif(10)*1000), c("reg", "reg_name")] # check a few names
# What are the other regions??
# aggregate(dat.regions$reg, by = list(dat.regions$reg), FUN = length)
dat.regions.ages <- dat.regions[dat.regions$cl_age90 != 0,]
tmp <- unique(dat.regions$reg) # Region codes
tmp <- tmp[tmp>10 & tmp <= 93] # Choose only metropolitan regions
par(mfrow = c(4, 3))
for(region in tmp){
subdat <- dat.regions[dat.regions$reg == region, ]
plot(subdat$date2, subdat$Prc_susp_501Y_V1, ylim = c(0, 100), main = regs[as.character(region)], col = colsAge[as.character(subdat$cl_age90)], pch = pchAge[as.character(subdat$cl_age90)],
xlab = "date", ylab = "Proportion V1"
)
}
# Create new colums with information on number of specific PCR tests
# PCR with V1 result
dat.regions.ages$V1 <- dat.regions.ages$Nb_susp_501Y_V1
# All other PCRs (considering NAs are non-V1)
dat.regions.ages$notV1 <- dat.regions.ages$Nb_tests_PCR_TA_crible - dat.regions.ages$Nb_susp_501Y_V1
# All other PCRs with a result (removing NAs)
dat.regions.ages$notV1.narm <- dat.regions.ages$Nb_susp_501Y_V2_3 + dat.regions.ages$Nb_susp_ABS
# Check that columns currently sum
all(dat.regions.ages$Nb_susp_501Y_V2_3 + dat.regions.ages$Nb_susp_ABS + dat.regions.ages$Nb_susp_501Y_V1 + dat.regions.ages$Nb_susp_IND - dat.regions.ages$Nb_tests_PCR_TA_crible == 0)
## [1] TRUE
dat.regions.ages$reg_name.fac <- as.factor(dat.regions.ages$reg_name)
# GLM
# Assuming that all IND (indetermine) are non-V1
mdl <- glm(cbind(V1, notV1) ~ date2 + cl_age90 + reg_name.fac, data = dat.regions.ages, family = "binomial")
summary(mdl)
##
## Call:
## glm(formula = cbind(V1, notV1) ~ date2 + cl_age90 + reg_name.fac,
## family = "binomial", data = dat.regions.ages)
##
## Deviance Residuals:
## Min 1Q Median 3Q Max
## -8.5196 -1.2720 -0.0263 0.9814 10.0068
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) -1.018e+03 6.332e+00 -160.732 < 2e-16
## date2 5.449e-02 3.389e-04 160.782 < 2e-16
## cl_age90 -6.954e-03 7.363e-05 -94.434 < 2e-16
## reg_name.facBourgogne-Franche-Comté -4.424e-01 9.849e-03 -44.917 < 2e-16
## reg_name.facBretagne 5.461e-01 1.066e-02 51.210 < 2e-16
## reg_name.facCentre-Val de Loire 1.026e-01 1.049e-02 9.782 < 2e-16
## reg_name.facCorse 8.699e-01 3.995e-02 21.774 < 2e-16
## reg_name.facGrand Est -4.762e-01 7.535e-03 -63.198 < 2e-16
## reg_name.facGuadeloupe 1.503e+00 7.091e-02 21.198 < 2e-16
## reg_name.facGuyane -5.714e-01 1.396e-01 -4.093 4.25e-05
## reg_name.facHauts-de-France 5.066e-01 6.294e-03 80.497 < 2e-16
## reg_name.facÃŽle-de-France 4.874e-01 5.783e-03 84.281 < 2e-16
## reg_name.facLa Réunion -2.673e+00 4.384e-02 -60.962 < 2e-16
## reg_name.facMartinique 5.291e-01 6.326e-02 8.364 < 2e-16
## reg_name.facMayotte -4.892e+00 2.048e-01 -23.890 < 2e-16
## reg_name.facNormandie 1.357e-01 9.218e-03 14.725 < 2e-16
## reg_name.facNouvelle-Aquitaine 4.314e-03 8.716e-03 0.495 0.620646
## reg_name.facOccitanie 3.120e-01 7.572e-03 41.204 < 2e-16
## reg_name.facPays de la Loire -3.233e-02 8.790e-03 -3.678 0.000235
## reg_name.facProvence-Alpes-Côte d'Azur 4.445e-01 6.656e-03 66.792 < 2e-16
##
## (Intercept) ***
## date2 ***
## cl_age90 ***
## reg_name.facBourgogne-Franche-Comté ***
## reg_name.facBretagne ***
## reg_name.facCentre-Val de Loire ***
## reg_name.facCorse ***
## reg_name.facGrand Est ***
## reg_name.facGuadeloupe ***
## reg_name.facGuyane ***
## reg_name.facHauts-de-France ***
## reg_name.facÃŽle-de-France ***
## reg_name.facLa Réunion ***
## reg_name.facMartinique ***
## reg_name.facMayotte ***
## reg_name.facNormandie ***
## reg_name.facNouvelle-Aquitaine
## reg_name.facOccitanie ***
## reg_name.facPays de la Loire ***
## reg_name.facProvence-Alpes-Côte d'Azur ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for binomial family taken to be 1)
##
## Null deviance: 103374 on 2874 degrees of freedom
## Residual deviance: 12153 on 2855 degrees of freedom
## (510 observations deleted due to missingness)
## AIC: 27172
##
## Number of Fisher Scoring iterations: 5
# Ignoring IND
mdl.narm <- glm(cbind(V1, notV1.narm) ~ date2 + cl_age90 + reg_name.fac, data = dat.regions.ages, family = "binomial")
summary(mdl.narm)
##
## Call:
## glm(formula = cbind(V1, notV1.narm) ~ date2 + cl_age90 + reg_name.fac,
## family = "binomial", data = dat.regions.ages)
##
## Deviance Residuals:
## Min 1Q Median 3Q Max
## -7.7191 -1.1813 0.0000 0.9934 8.1789
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) -1.140e+03 6.797e+00 -167.653 < 2e-16
## date2 6.102e-02 3.638e-04 167.723 < 2e-16
## cl_age90 -7.544e-03 7.899e-05 -95.511 < 2e-16
## reg_name.facBourgogne-Franche-Comté -4.909e-01 1.010e-02 -48.620 < 2e-16
## reg_name.facBretagne 5.698e-01 1.122e-02 50.802 < 2e-16
## reg_name.facCentre-Val de Loire 2.218e-01 1.119e-02 19.824 < 2e-16
## reg_name.facCorse 1.131e+00 4.614e-02 24.519 < 2e-16
## reg_name.facGrand Est -4.672e-01 7.809e-03 -59.833 < 2e-16
## reg_name.facGuadeloupe 1.464e+00 7.423e-02 19.730 < 2e-16
## reg_name.facGuyane -4.394e-01 1.461e-01 -3.008 0.00263
## reg_name.facHauts-de-France 6.200e-01 6.666e-03 93.007 < 2e-16
## reg_name.facÃŽle-de-France 6.858e-01 6.156e-03 111.405 < 2e-16
## reg_name.facLa Réunion -2.617e+00 4.424e-02 -59.143 < 2e-16
## reg_name.facMartinique 4.751e-01 6.516e-02 7.291 3.07e-13
## reg_name.facMayotte -4.908e+00 2.049e-01 -23.948 < 2e-16
## reg_name.facNormandie 2.183e-01 9.762e-03 22.362 < 2e-16
## reg_name.facNouvelle-Aquitaine 5.161e-03 9.060e-03 0.570 0.56896
## reg_name.facOccitanie 3.660e-01 7.965e-03 45.956 < 2e-16
## reg_name.facPays de la Loire -4.888e-02 9.100e-03 -5.372 7.78e-08
## reg_name.facProvence-Alpes-Côte d'Azur 5.583e-01 7.059e-03 79.086 < 2e-16
##
## (Intercept) ***
## date2 ***
## cl_age90 ***
## reg_name.facBourgogne-Franche-Comté ***
## reg_name.facBretagne ***
## reg_name.facCentre-Val de Loire ***
## reg_name.facCorse ***
## reg_name.facGrand Est ***
## reg_name.facGuadeloupe ***
## reg_name.facGuyane **
## reg_name.facHauts-de-France ***
## reg_name.facÃŽle-de-France ***
## reg_name.facLa Réunion ***
## reg_name.facMartinique ***
## reg_name.facMayotte ***
## reg_name.facNormandie ***
## reg_name.facNouvelle-Aquitaine
## reg_name.facOccitanie ***
## reg_name.facPays de la Loire ***
## reg_name.facProvence-Alpes-Côte d'Azur ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for binomial family taken to be 1)
##
## Null deviance: 115520 on 2844 degrees of freedom
## Residual deviance: 10258 on 2825 degrees of freedom
## (510 observations deleted due to missingness)
## AIC: 24970
##
## Number of Fisher Scoring iterations: 5
par(mfrow = c(4, 3))
for(region in tmp){
subdat <- dat.regions[dat.regions$reg == region, ]
plot(subdat$date2, subdat$Prc_susp_501Y_V2_3, ylim = c(0, 100), main = regs[as.character(region)], col = colsAge[as.character(subdat$cl_age90)], pch = pchAge[as.character(subdat$cl_age90)],
xlab = "date", ylab = "Proportion V2/V3"
)
}
# Create new colums with information on number of specific PCR tests
# PCR with V2/3 result
dat.regions.ages$V23 <- dat.regions.ages$Nb_susp_501Y_V2_3
# All other PCRs (considering NAs are non-V1)
dat.regions.ages$notV23 <- dat.regions.ages$Nb_tests_PCR_TA_crible - dat.regions.ages$Nb_susp_501Y_V2_3
# All other PCRs with a result (removing NAs)
dat.regions.ages$notV23.narm <- dat.regions.ages$Nb_susp_501Y_V1 + dat.regions.ages$Nb_susp_ABS
# GLM
# Assuming that all IND (indetermine) are non-V23
mdl <- glm(cbind(V23, notV23) ~ date2 + cl_age90 + reg_name.fac, data = dat.regions.ages, family = "binomial")
summary(mdl)
##
## Call:
## glm(formula = cbind(V23, notV23) ~ date2 + cl_age90 + reg_name.fac,
## family = "binomial", data = dat.regions.ages)
##
## Deviance Residuals:
## Min 1Q Median 3Q Max
## -6.9544 -0.8782 -0.2063 0.4952 5.6473
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) -1.596e+02 1.446e+01 -11.031 < 2e-16
## date2 8.370e-03 7.741e-04 10.812 < 2e-16
## cl_age90 -3.097e-03 1.721e-04 -17.999 < 2e-16
## reg_name.facBourgogne-Franche-Comté 3.777e-02 2.568e-02 1.471 0.1413
## reg_name.facBretagne 4.743e-02 2.735e-02 1.734 0.0829
## reg_name.facCentre-Val de Loire -8.335e-01 3.952e-02 -21.090 < 2e-16
## reg_name.facCorse -1.746e+00 2.305e-01 -7.573 3.64e-14
## reg_name.facGrand Est 1.916e+00 1.458e-02 131.439 < 2e-16
## reg_name.facGuadeloupe -9.810e-01 2.314e-01 -4.240 2.23e-05
## reg_name.facGuyane -1.376e+01 2.031e+02 -0.068 0.9460
## reg_name.facHauts-de-France -4.867e-01 1.861e-02 -26.155 < 2e-16
## reg_name.facÃŽle-de-France 3.848e-01 1.465e-02 26.260 < 2e-16
## reg_name.facLa Réunion 2.896e+00 2.634e-02 109.970 < 2e-16
## reg_name.facMartinique -1.467e+00 3.350e-01 -4.381 1.18e-05
## reg_name.facMayotte 3.424e+00 3.539e-02 96.767 < 2e-16
## reg_name.facNormandie -1.435e-01 2.603e-02 -5.512 3.55e-08
## reg_name.facNouvelle-Aquitaine -1.374e-01 2.454e-02 -5.600 2.14e-08
## reg_name.facOccitanie -9.799e-01 2.816e-02 -34.800 < 2e-16
## reg_name.facPays de la Loire 5.926e-01 1.984e-02 29.875 < 2e-16
## reg_name.facProvence-Alpes-Côte d'Azur -1.851e-01 1.855e-02 -9.981 < 2e-16
##
## (Intercept) ***
## date2 ***
## cl_age90 ***
## reg_name.facBourgogne-Franche-Comté
## reg_name.facBretagne .
## reg_name.facCentre-Val de Loire ***
## reg_name.facCorse ***
## reg_name.facGrand Est ***
## reg_name.facGuadeloupe ***
## reg_name.facGuyane
## reg_name.facHauts-de-France ***
## reg_name.facÃŽle-de-France ***
## reg_name.facLa Réunion ***
## reg_name.facMartinique ***
## reg_name.facMayotte ***
## reg_name.facNormandie ***
## reg_name.facNouvelle-Aquitaine ***
## reg_name.facOccitanie ***
## reg_name.facPays de la Loire ***
## reg_name.facProvence-Alpes-Côte d'Azur ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for binomial family taken to be 1)
##
## Null deviance: 72740.2 on 2874 degrees of freedom
## Residual deviance: 5582.3 on 2855 degrees of freedom
## (510 observations deleted due to missingness)
## AIC: 15796
##
## Number of Fisher Scoring iterations: 15
# Ignoring IND
mdl.narm <- glm(cbind(V23, notV23.narm) ~ date2 + cl_age90 + reg_name.fac, data = dat.regions.ages, family = "binomial")
summary(mdl.narm)
##
## Call:
## glm(formula = cbind(V23, notV23.narm) ~ date2 + cl_age90 + reg_name.fac,
## family = "binomial", data = dat.regions.ages)
##
## Deviance Residuals:
## Min 1Q Median 3Q Max
## -7.3471 -0.8218 -0.2075 0.5353 5.6238
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) -1.603e+02 1.458e+01 -10.994 < 2e-16
## date2 8.414e-03 7.804e-04 10.781 < 2e-16
## cl_age90 -3.061e-03 1.740e-04 -17.595 < 2e-16
## reg_name.facBourgogne-Franche-Comté 2.621e-02 2.570e-02 1.020 0.308
## reg_name.facBretagne 3.726e-02 2.738e-02 1.361 0.174
## reg_name.facCentre-Val de Loire -7.882e-01 3.956e-02 -19.924 < 2e-16
## reg_name.facCorse -1.708e+00 2.306e-01 -7.408 1.28e-13
## reg_name.facGrand Est 1.964e+00 1.464e-02 134.195 < 2e-16
## reg_name.facGuadeloupe -1.031e+00 2.314e-01 -4.458 8.29e-06
## reg_name.facGuyane -1.387e+01 2.242e+02 -0.062 0.951
## reg_name.facHauts-de-France -4.660e-01 1.863e-02 -25.010 < 2e-16
## reg_name.facÃŽle-de-France 4.362e-01 1.468e-02 29.721 < 2e-16
## reg_name.facLa Réunion 3.144e+00 2.778e-02 113.176 < 2e-16
## reg_name.facMartinique -1.505e+00 3.350e-01 -4.491 7.09e-06
## reg_name.facMayotte 3.582e+00 3.730e-02 96.021 < 2e-16
## reg_name.facNormandie -1.109e-01 2.608e-02 -4.252 2.12e-05
## reg_name.facNouvelle-Aquitaine -1.353e-01 2.457e-02 -5.505 3.69e-08
## reg_name.facOccitanie -9.703e-01 2.818e-02 -34.435 < 2e-16
## reg_name.facPays de la Loire 5.891e-01 1.987e-02 29.647 < 2e-16
## reg_name.facProvence-Alpes-Côte d'Azur -1.587e-01 1.857e-02 -8.547 < 2e-16
##
## (Intercept) ***
## date2 ***
## cl_age90 ***
## reg_name.facBourgogne-Franche-Comté
## reg_name.facBretagne
## reg_name.facCentre-Val de Loire ***
## reg_name.facCorse ***
## reg_name.facGrand Est ***
## reg_name.facGuadeloupe ***
## reg_name.facGuyane
## reg_name.facHauts-de-France ***
## reg_name.facÃŽle-de-France ***
## reg_name.facLa Réunion ***
## reg_name.facMartinique ***
## reg_name.facMayotte ***
## reg_name.facNormandie ***
## reg_name.facNouvelle-Aquitaine ***
## reg_name.facOccitanie ***
## reg_name.facPays de la Loire ***
## reg_name.facProvence-Alpes-Côte d'Azur ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for binomial family taken to be 1)
##
## Null deviance: 74449.0 on 2844 degrees of freedom
## Residual deviance: 5479.8 on 2825 degrees of freedom
## (510 observations deleted due to missingness)
## AIC: 15614
##
## Number of Fisher Scoring iterations: 15
URL <- "https://www.data.gouv.fr/fr/datasets/r/16f4fd03-797f-4616-bca9-78ff212d06e8"
dataFile <- paste0("data/Dep_", today, ".csv") # name file with today's date
download.file(URL, dataFile) # download file from repo
dat.deps <- read.csv(dataFile, sep = ";", stringsAsFactors = FALSE)
# Format date
dat.deps$date1 <- as.Date(substring(dat.deps$semaine, 1, 10))
dat.deps$date2 <- as.Date(substring(dat.deps$semaine, 12, 21))
# Add name
deps <- read.csv("data/departement2020.csv", stringsAsFactors = FALSE)
# Turn into dictionnary
dps <- deps$libelle
names(dps) <- as.character(deps$dep)
dat.deps$departement <- dps[as.character(dat.deps$dep)]
par(mfrow = c(35, 3))
for(idep in sort(unique(dat.deps$dep))){
tmp <- dat.deps[dat.deps$dep == idep, ]
plot(tmp$date2, tmp$Prc_susp_501Y_V1, ylim = c(0, 100), col = colsAge[as.character(tmp$cl_age90)], pch = pchAge[as.character(tmp$cl_age90)],
xlab = "date", ylab = "Proportion V1",
main = unique(tmp$departement))
# legend(x = min(dat.deps$date2), y = 100, legend = ages, pch = pchAge, col = colsAge)
}
par(mfrow = c(35, 3))
for(idep in sort(unique(dat.deps$dep))){
tmp <- dat.deps[dat.deps$dep == idep, ]
plot(tmp$date2, tmp$Prc_susp_501Y_V2_3, ylim = c(0, 100), col = colsAge[as.character(tmp$cl_age90)], pch = pchAge[as.character(tmp$cl_age90)],
xlab = "date", ylab = "Proportion V2/V3",
main = unique(tmp$departement))
# legend(x = min(dat.deps$date2), y = 100, legend = ages, pch = pchAge, col = colsAge)
}